import os.path

from gxl_ai_utils.utils import utils_file

def do_copy_file_by_file_list_multi_process(input_data_list, output_dir, num_process):
    """"""
    res_list =  utils_file.do_copy_files_by_manifest(input_data_list, output_dir, manifest_type='list', num_thread=num_process)
    manifest_path = os.path.join(output_dir, 'shards.list')
    utils_file.write_list_to_file(res_list, manifest_path)


def do_data_copy_for_as():
    """"""
    asr_shard_list_path = "/home/work_nfs15/mcshao/workspace/4o/wenet_whisper_finetune/examples/wenetspeech/whisper/data/shards_train_list.txt"
    output_dir = "/home/node54_tmpdata/xlgeng/asr_data_2w"
    do_copy_file_by_file_list_multi_process(asr_shard_list_path, output_dir, num_process=32)

def do_data_copy_for_sot():
    """"""
    sot_shard_list_path = "/home/work_nfs13/yhdai/data/4o_data/SOT/shards_list.txt"
    output_dir = "/home/node54_tmpdata/xlgeng/sot_data"
    do_copy_file_by_file_list_multi_process(sot_shard_list_path, output_dir, num_process=32)
